#
# Copyright 2008 Zuza Software Foundation
#
# This file is part of translate.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

"""
Fill localization files with suggested translations based on
translation memory and existing translations.

See: http://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/pretranslate.html
for examples and usage instructions.
"""

from translate.convert import convert
from translate.search import match
from translate.storage import factory

# We don't want to reinitialise the TM each time, so let's store it here.
tmmatcher = None


def memory(tmfiles, max_candidates=1, min_similarity=75, max_length=1000):
    """Returns the TM store to use. Only initialises on first call."""
    global tmmatcher  # noqa: PLW0603
    # Only initialise first time
    if tmmatcher is None:
        if isinstance(tmfiles, list):
            tmstore = [factory.getobject(tmfile) for tmfile in tmfiles]
        else:
            tmstore = factory.getobject(tmfiles)
        tmmatcher = match.matcher(
            tmstore,
            max_candidates=max_candidates,
            min_similarity=min_similarity,
            max_length=max_length,
        )
    return tmmatcher


def pretranslate_file(
    input_file,
    output_file,
    template_file,
    tm=None,
    min_similarity=75,
    fuzzymatching=True,
):
    """
    Pretranslate any factory supported file with old translations and
    translation memory.
    """
    input_store = factory.getobject(input_file)
    template_store = None
    if template_file is not None:
        template_store = factory.getobject(template_file)

    output = pretranslate_store(
        input_store, template_store, tm, min_similarity, fuzzymatching
    )
    output.serialize(output_file)
    return 1


def match_template_location(input_unit, template_store):
    """Returns a matching unit from a template. matching based on locations."""
    # we want to use slightly different matching strategies for PO files
    # generated by our own moz2po and oo2po. Let's take a cheap shot at
    # detecting them from the presence of a ':' in the first location.
    locations = input_unit.getlocations()
    if not locations or ":" in locations[0]:
        return match_template_id(input_unit, template_store)

    # since oo2po and moz2po use location as unique identifiers for strings
    # we match against location first, then check for matching source strings
    # this makes no sense for normal gettext files
    for location in locations:
        matching_unit = template_store.locationindex.get(location, None)
        if (
            matching_unit is not None
            and matching_unit.source == input_unit.source
            and matching_unit.gettargetlen() > 0
        ):
            return matching_unit
    return None


def match_template_id(input_unit, template_store):
    """Returns a matching unit from a template. matching based on unit id."""
    return template_store.findid(input_unit.getid())


def match_source(input_unit, template_store):
    """Returns a matching unit from a template. matching based on unit id."""
    # hack for weird mozilla single letter strings, we don't want to
    # match them by anything but locations
    if len(input_unit.source) > 1:
        return template_store.findunit(input_unit.source)
    return None


def match_fuzzy(input_unit, matchers):
    """Return a fuzzy match from a queue of matchers."""
    for matcher in matchers:
        fuzzycandidates = matcher.matches(input_unit.source)
        if fuzzycandidates:
            return fuzzycandidates[0]
    return None


def pretranslate_unit(
    input_unit, template_store, matchers=None, mark_reused=False, merge_on="id"
):
    """
    Pretranslate a unit or return unchanged if no translation was found.

    :param input_unit: Unit that will be pretranslated.
    :param template_store: Fill input unit with units matching in this store.
    :param matchers: List of fuzzy :class:`~translate.search.match.matcher`
        objects.
    :param mark_reused: Whether to mark old translations as reused or not.
    :param merge_on: Where will the merge matching happen on.
    """
    matching_unit = None

    # Do template matching
    if template_store:
        # :param:`merge_on` supports `location` and `id` for now
        if merge_on == "location":
            matching_unit = match_template_location(input_unit, template_store)
        else:
            matching_unit = match_template_id(input_unit, template_store)

    if matching_unit and matching_unit.gettargetlen() > 0:
        input_unit.merge(matching_unit, authoritative=True)
    elif matchers:
        # quickly try exact match by source
        matching_unit = match_source(input_unit, template_store)

        if not matching_unit or not matching_unit.gettargetlen():
            # do fuzzy matching
            matching_unit = match_fuzzy(input_unit, matchers)

        if matching_unit and matching_unit.gettargetlen() > 0:
            # FIXME: should we dispatch here instead of this crude attr check
            if hasattr(input_unit, "addalttrans"):
                # FIXME: what about origin, lang and matchquality
                input_unit.addalttrans(
                    matching_unit.target, origin="fish", sourcetxt=matching_unit.source
                )
            else:
                input_unit.merge(matching_unit, authoritative=True)

    # FIXME: ugly hack required by pot2po to mark old
    # translations reused for new file. loops over
    if mark_reused and matching_unit and template_store:
        original_unit = template_store.findunit(matching_unit.source)
        if original_unit is not None:
            original_unit.reused = True

    return input_unit


def pretranslate_store(
    input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True
):
    """Do the actual pretranslation of a whole store."""
    # preparation
    matchers = []
    # prepare template
    if template_store is not None:
        template_store.makeindex()
        # template preparation based on type
        prepare_template = f"prepare_template_{template_store.__class__.__name__}"
        if prepare_template in globals():
            globals()[prepare_template](template_store)

        if fuzzymatching:
            # create template matcher
            # FIXME: max_length hardcoded
            matcher = match.matcher(
                template_store,
                max_candidates=1,
                min_similarity=min_similarity,
                max_length=3000,
                usefuzzy=True,
            )
            matcher.addpercentage = False
            matchers.append(matcher)

    # prepare tm
    # create tm matcher
    if tm and fuzzymatching:
        # FIXME: max_length hardcoded
        matcher = memory(
            tm, max_candidates=1, min_similarity=min_similarity, max_length=1000
        )
        matcher.addpercentage = False
        matchers.append(matcher)

    # Main loop
    for input_unit in input_store.units:
        if input_unit.istranslatable():
            pretranslate_unit(
                input_unit, template_store, matchers, merge_on=input_store.merge_on
            )

    return input_store


def main(argv=None):
    formats = {
        "pot": ("po", pretranslate_file),
        ("pot", "po"): ("po", pretranslate_file),
        "po": ("po", pretranslate_file),
        ("po", "po"): ("po", pretranslate_file),
        "xlf": ("xlf", pretranslate_file),
        ("xlf", "xlf"): ("xlf", pretranslate_file),
        "xliff": ("xliff", pretranslate_file),
        ("xliff", "xliff"): ("xliff", pretranslate_file),
    }
    parser = convert.ConvertOptionParser(
        formats, usetemplates=True, allowmissingtemplate=True, description=__doc__
    )
    parser.add_option(
        "",
        "--tm",
        dest="tm",
        default=None,
        help="The file to use as translation memory when fuzzy matching",
    )
    parser.passthrough.append("tm")
    defaultsimilarity = 75
    parser.add_option(
        "-s",
        "--similarity",
        dest="min_similarity",
        default=defaultsimilarity,
        type="float",
        help=f"The minimum similarity for inclusion (default: {defaultsimilarity}%)",
    )
    parser.passthrough.append("min_similarity")
    parser.add_option(
        "--nofuzzymatching",
        dest="fuzzymatching",
        action="store_false",
        default=True,
        help="Disable fuzzy matching",
    )
    parser.passthrough.append("fuzzymatching")
    parser.run(argv)


if __name__ == "__main__":
    main()
